********************************************************************************
* Project: Structural Cointegration of the Climate--Carbon Feedback
* Authors: S. Nakano and K. Nishimura
* Description: Replication code for VECM estimation, IV-GMM identification, 
* and diagnostic testing.
* Data Source: Jouzel et al. (2007), Luethi et al. (2008), Laskar et al. (2004)
********************************************************************************

clear all
set more off
capture log close

* --- 1. Data Preparation: Generating Master Grid (100-year resolution) ---
set obs 10001
gen cyear = 2000 - (_n-1)*100
label var cyear "Year AD (100-year based grid)"
sort cyear
save "temp_master_grid.dta", replace

* --- 2. Processing Temperature Data (EPICA Dome C) ---
import delimited "Temperature_raw.csv", clear
gen year_tmp = 1950 - age_calbp
gen cyear = floor(year_tmp / 100) * 100
collapse (mean) temperature, by(cyear)
save "temp_processed.dta", replace

* --- 3. Processing CO2 Data (Composite Record) ---
import delimited "CO2_raw.csv", clear
gen year_tmp = 1950 - gasageyrbp
gen cyear = floor(year_tmp / 100) * 100
collapse (mean) co2ppmv, by(cyear)
save "co2_processed.dta", replace

* --- 4. Processing Insolation Data (Laskar et al. 2004) ---
import delimited "Insolation_raw.csv", clear
rename year cyear
* Aggregate all solar forcing variables
collapse (mean) n* s* zero*, by(cyear)
save "insol_processed.dta", replace

* --- 5. Merging Datasets ---
use "temp_master_grid.dta", clear
merge 1:1 cyear using "temp_processed.dta", keep(master match) nogenerate
merge 1:1 cyear using "co2_processed.dta", keep(master match) nogenerate
merge 1:1 cyear using "insol_processed.dta", keep(master match) nogenerate

* --- 6. Time-series Configuration ---
sort cyear
tsset cyear, delta(100)
gen lnco2 = ln(co2ppmv)
rename temperature tem
label var tem "Antarctic Temperature Anomaly (K)"
label var lnco2 "Natural log of CO2 (ppmv)"

* Define Insolation Instruments
gen iv_june60 = n60june
gen iv_dec60  = n60dec
gen iv_dec30  = n30dec
* Note: Supplemental instruments are defined within sample blocks

* ==============================================================================
* Table 1 & 2: Unit-root, Cointegration, and Lag Selection (130 ka Gap-free Sample)
* ==============================================================================
preserve
    * Construct 1,000-year resolution grid for VECM
    gen year_g = floor(cyear / 1000) * 1000
    collapse (mean) tem lnco2 iv_june60 iv_dec60, by(year_g)
    keep if inrange(year_g, -130000, 1000)
    tsset year_g, delta(1000)
    
    * Unit-root Tests (ADF)
    dfuller tem        // Z(t) = -1.227, p = 0.662
    dfuller lnco2      // Z(t) = -0.565, p = 0.879
    
    * Cointegration Test (Engle-Granger)
    egranger tem lnco2 // Z(t) = -3.176 (Significant at 10% level)
    
    * Lag Order Selection
    varsoc tem lnco2   // Lag 3 selected by AIC/HQIC/SBIC
restore

* ==============================================================================
* Table 3: VECM Estimation (130 ka vs. 800 ka Samples)
* ==============================================================================
* --- Analysis for 130 ka Gap-free Sample ---
preserve
    gen year_g = floor(cyear / 1000) * 1000
    collapse (mean) tem lnco2 iv_june60, by(year_g)
    keep if inrange(year_g, -130000, 1000)
    tsset year_g, delta(1000)

    * VECM with 3 lags and 1 cointegrating rank
    vec tem lnco2 iv_june60, lags(3) rank(1) 
    * ESS Result: beta_lnco2 = -18.748 (Antarctic ESS = 13.0 K)
restore

* --- Analysis for 800 ka Interpolated Sample ---
preserve
    ipolate tem cyear, gen(tem_i)
    ipolate lnco2 cyear, gen(lnco2_i)
    gen year_g = floor(cyear / 1000) * 1000
    collapse (mean) tem=tem_i lnco2=lnco2_i (mean) iv_june60, by(year_g)
    tsset year_g, delta(1000)

    vec tem lnco2 iv_june60, lags(3) rank(1)
    * ESS Result: beta_lnco2 = -20.251
restore

* ==============================================================================
* Table 4: Identification of CCR (a21) via IV and OLS (130 ka Sample)
* ==============================================================================
preserve
    gen year_g = floor(cyear / 1000) * 1000
    collapse (mean) tem lnco2 n60dec, by(year_g)
    keep if inrange(year_g, -130000, 1000)
    tsset year_g, delta(1000)

    * IV Estimation using Orbital Forcing and Lagged Temperature
    ivreg2 D.lnco2 (D.tem = n60dec L2.tem), endog(D.tem)
    * Endogeneity test p-value = 0.864 (Exogeneity cannot be rejected)

    * OLS Estimation (Justified by endogeneity test)
    reg D.lnco2 D.tem, robust
    * Result: a21 = 0.0208 (z = 8.04, p < 0.01)
restore

* ==============================================================================
* Table 5: GMM Joint Estimation of a12 (CTR) and a21 (CCR)
* ==============================================================================
preserve
    gen year_g = floor(cyear / 1000) * 1000
    collapse (mean) tem lnco2 iv_june60, by(year_g)
    keep if inrange(year_g, -130000, 1000)
    tsset year_g, delta(1000)

    * Extract residuals from reduced-form VECM
    quietly vec tem lnco2 iv_june60, lags(3) rank(1)
    predict r1, res equation(#1) // Temperature residual
    predict r2, res equation(#2) // CO2 residual
    
    * GMM Identification via Causality Allocation Curve (CAC)
    gmm (eq1: D.lnco2 - {a21}*D.tem - {b0}) ///
        (eq2: r1*r2*(1 + {a12}*{a21}) - {a21}*r1*r1 - {a12}*r2*r2), ///
        instruments(eq1: D.tem) instruments(eq2: ) winitial(identity) vce(robust)
    
    * Wald Test for Null-CTR Hypothesis (H0: a12 = 0)
    test _b[/a12] = 0 // chi2 = 0.01, p = 0.9303 (Cannot reject a12 = 0)
restore

* ==============================================================================
* Data Export for Visualization (Figure 2)
* ==============================================================================
* Exporting gap analysis for gap-free sample justification
preserve
    gen year_g = floor(cyear / 1000) * 1000
    collapse (max) has_tem=tem has_co2=lnco2, by(year_g)
    gen gap_free_interval = (has_tem != . & has_co2 != .)
    [cite_start]export delimited using "data_availability_1000yr.csv", replace
restore

exit
